## Generalized Synthetic Control Method
## Replication Materials

##Function to create simulated sampes
## Revised based on Bai (2009)

## Author: Yiqing Xu

load("FLSource.RData") ## fixed factors and loadings

simulate<-function(Ntr, Nco, T0, p, r, m=0, w=1, D.sd=1, att=c(1:10),
                   beta=NULL, mu=0, fixF=FALSE, fixL=FALSE, fsize=1,
                   FE=0, seed=NULL,unif=FALSE,AR1=0) {
  
    ## p, number of covariates (p=0, no covariates)
    ## Ntr, Nco, N
    ## T0, T
    ## r
    ## m: number of Z
    ## overlap w = [0,1], w==1 means complete overlap
    ## D.sd: heterogeneity
    ## beta: true coefficients for X
    ## att: average treatment effect
    ## fsize: relative importance of factors vs. covariates
    ## trend: importance of a linear time trend 
    ## FE: to include unit and time fixed effects
    ## fixF: factors as given
    ## fixL: loadings as given
    ## AR1: Autoregressive(1) coefficient

    if (is.null(seed)==FALSE) {
        set.seed(seed)
    }
    N<-Ntr+Nco
    T<-T0+length(att)
    
  
    Tr<-1:Ntr   # treatment
    Co<-(Ntr+1):N # control
    pre<-1:T0
    pst<-c(1:T)[-pre]

    ## ###########################
    ## Data generating process
    ## ###########################
    
    rr<-m + r # observed and unobserved

    ## loadings: get 10 (for the construction of X), use the first 1:r columns
    ss<-sqrt(3) # to ensure variance =1
    if (rr > 0) {
        if (fixL==FALSE) {
            lambda<-matrix(runif(N*rr,min=-ss,max=ss),N,rr)  # loadings     
        } else {
            lambda<-L.source[c(c(1:Ntr),c(501:(500+Nco))),1:rr]
        } 
        lambda[1:Ntr,] <- lambda[1:Ntr,]+(1-w)*2*ss # overlap of loadings, w determines the shift of the uniform distribution
    }
    
    ## factors
    if (fixF==FALSE) { # F not given
        factor<-matrix(rnorm(T*rr),T,rr)   # factors
    } else {
        if (unif==FALSE) {
            factor<-F.source[(dim(F.source)[1]-T+1):dim(F.source)[1],1:rr]
                                        # always use the last T rows, first rr columns
        }  else { # uniform distribution 
            factor<-F.u.source[(dim(F.u.source)[1]-T+1):dim(F.u.source)[1],1:rr] 
        }
    } 
    
    ## fixed effects
    if (FE==1) {
        ## unit fixed effects
        if (fixL==FALSE) {
            alpha<-runif(N,min=-ss,max=ss)
        } else {
            alpha <- L.source[c(c(1:Ntr),c(501:(500+Nco))),20] # the last column
        }
        alpha[1:Ntr]<-alpha[1:Ntr]+(1-w)*2*ss    
        ## time fixed effects
        if (fixF==FALSE) {
            xi<-rnorm(T,0,1) 
        } else {
            if (unif==FALSE) {
                xi<-F.source[(dim(F.source)[1]-T+1):dim(F.source)[1],20] # the 20th (last) column
            }  else {
                xi<-F.u.source[(dim(F.u.source)[1]-T+1):dim(F.u.source)[1],20] # the 20th (last) column
            }
            
        }  
    }

    ## error
    e <- matrix(rnorm(T*N),T,N) # disturbances
    
    ## time varying covariates: always generated by the same first two factors
    truebeta<-beta
    if (p!=0) {
        X<-array(0,dim=c(T,N,p))    # regressor matrix, must be T by N  by  p 
        for (j in 1:p) {
            X[,,j] <- matrix(rnorm(T*N),T,N)  +
                0.5 * factor[,1:2] %*% t(lambda[,1:2]) +
                0.25 * matrix(1,T,2) %*% t(lambda[,1:2]) +
                0.25 * factor[,1:2] %*% matrix(1,2,N)+1
        }
    }  
    
    ## treatment assignment
    D<-cbind(rbind(matrix(0,T0,Ntr),matrix(1,(T-T0),Ntr)),matrix(0,T,Nco)) 
    
    ## the treatment effect
    eff<-matrix(c(rep(0,T0),att),T,N)+rbind(matrix(0,T0,N),matrix(rnorm((T-T0)*N,0,D.sd),(T-T0),N))
    
    ## outcome variable
    Y0<- e  +  matrix(mu,T,N) # error + grand mean  
    if (r>0) {
        Y0 <- Y0 + fsize*factor%*%t(lambda)
    } 
    if (FE==1) {
        Y0 <- Y0 + matrix(alpha,T,N,byrow=TRUE) + matrix(xi,T,N,byrow=FALSE)
    }
    if (p!=0) { # covariates
        for (k in 1:p) {
            Y0<-Y0+X[,,k]*truebeta[k]
        }
    }
    Y1 <- Y0 + eff
    if (AR1>0) {
        for (t in 2:T) {
            Y0[t,]<-Y0[(t-1),]*AR1 + Y0[t,]
            Y1[t,]<-Y1[(t-1),]*AR1 + Y1[t,]
        }
        eff.acc<-Y1-Y0 # accumulative effect, T*N matrix
    }  
    Y<-(matrix(1,T,N)-D)*Y0+D*Y1 
    if (AR1>0) {
        Y.lag<-matrix(NA,T,N); Y.lag[2:T,]<-Y[1:(T-1),]
    }

    ## substract error
    Y.bar <- Y - e
    
    ## panel structure
    panel<-as.data.frame(cbind(rep(101:(100+N),each=T),rep(1:T,N),
                               c(Y),c(Y0),c(Y1),c(D),c(eff),c(e),c(Y.bar),
                               rep(mu,T*N),rep(1,T*N),
                               c(rep(1,T*Ntr),rep(0,T*Nco))))
    cname<-c("id","time","Y","Y0","Y1","D","eff","error","Ybar","mu","X0","treat")
    if (p!=0) {
        for (i in 1:p) {
            panel<-cbind(panel,c(X[,,i]))
            cname<-c(cname,paste("X",i,sep=""))
        }
    }
    if (rr > 0) {
        for (i in 1:rr) {
            panel<-cbind(panel,rep(factor[,i],N))
            cname<-c(cname,paste("F",i,sep=""))
        }
    }
    if (m > 0) {
        for (i in 1:m) {
            panel<-cbind(panel,rep(lambda[,i],each=T))
            cname<-c(cname,paste("Z",i,sep=""))
        }
    }
    if (r > 0) {
        for (i in 1:r) {
            panel<-cbind(panel,rep(lambda[,(m+i)],each=T))
            cname<-c(cname,paste("L",i,sep=""))
        }
    } 
    if (FE==1) {
        panel<-cbind(panel,rep(alpha,each=T),rep(xi,N))
        cname<-c(cname,"alpha","xi")
    }  
    if (AR1>0) {
        panel<-cbind(panel,c(Y.lag),c(eff.acc))
        cname<-c(cname,"Y_lag","eff_acc")
    } 
    colnames(panel)<-cname
    return(panel)
}


